{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Hyper-Parameter tunning in scikit-learn"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Importing libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "from sklearn.metrics import mean_squared_error\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Data preparation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# importing data\n",
    "data_path= '../data/diamonds.csv'\n",
    "diamonds = pd.read_csv(data_path)\n",
    "diamonds = pd.concat([diamonds, pd.get_dummies(diamonds['cut'], prefix='cut', drop_first=True)],axis=1)\n",
    "diamonds = pd.concat([diamonds, pd.get_dummies(diamonds['color'], prefix='color', drop_first=True)],axis=1)\n",
    "diamonds = pd.concat([diamonds, pd.get_dummies(diamonds['clarity'], prefix='clarity', drop_first=True)],axis=1)\n",
    "diamonds.drop(['cut','color','clarity'], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Preparing objects for modelling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import RobustScaler\n",
    "from sklearn.model_selection import train_test_split\n",
    "target_name = 'price'\n",
    "robust_scaler = RobustScaler()\n",
    "X = diamonds.drop('price', axis=1)\n",
    "X = robust_scaler.fit_transform(X)\n",
    "y = diamonds[target_name]\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=123)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Defining the tuning grid"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* **n_estimators**: The number of trees in the forest.\n",
    "\n",
    "* **max_features**: The number of features to consider when looking for the best split. Posible choices: n_features, sqrt(n_features), log2(n_features).\n",
    "    \n",
    "* **max_depth**: The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.\n",
    "\n",
    "Model we have been using:\n",
    "\n",
    "RandomForestRegressor(n_estimators=50, max_depth=16, max_features= 'auto', random_state=55, n_jobs=-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestRegressor\n",
    "RF = RandomForestRegressor(random_state=55, n_jobs=-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "parameter_grid = {'n_estimators': [25,50,75,100], \n",
    "                  'max_depth': [10,15,20,30],\n",
    "                  'max_features': ['auto','sqrt']}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import GridSearchCV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=10, error_score='raise',\n",
       "       estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n",
       "           max_features='auto', max_leaf_nodes=None,\n",
       "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "           min_samples_leaf=1, min_samples_split=2,\n",
       "           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,\n",
       "           oob_score=False, random_state=55, verbose=0, warm_start=False),\n",
       "       fit_params=None, iid=True, n_jobs=-1,\n",
       "       param_grid={'n_estimators': [25, 50, 75, 100], 'max_depth': [10, 15, 20, 30], 'max_features': ['auto', 'sqrt']},\n",
       "       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,\n",
       "       scoring='mean_squared_error', verbose=0)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "RF_classifier = GridSearchCV(estimator=RF, param_grid=parameter_grid, refit=True,\n",
    "                             scoring='mean_squared_error', cv=10, n_jobs=-1)\n",
    "\n",
    "RF_classifier.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean_fit_time</th>\n",
       "      <th>mean_score_time</th>\n",
       "      <th>mean_test_score</th>\n",
       "      <th>mean_train_score</th>\n",
       "      <th>param_max_depth</th>\n",
       "      <th>param_max_features</th>\n",
       "      <th>param_n_estimators</th>\n",
       "      <th>params</th>\n",
       "      <th>rank_test_score</th>\n",
       "      <th>split0_test_score</th>\n",
       "      <th>...</th>\n",
       "      <th>split7_test_score</th>\n",
       "      <th>split7_train_score</th>\n",
       "      <th>split8_test_score</th>\n",
       "      <th>split8_train_score</th>\n",
       "      <th>split9_test_score</th>\n",
       "      <th>split9_train_score</th>\n",
       "      <th>std_fit_time</th>\n",
       "      <th>std_score_time</th>\n",
       "      <th>std_test_score</th>\n",
       "      <th>std_train_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.513492</td>\n",
       "      <td>1.439769</td>\n",
       "      <td>-634506.134927</td>\n",
       "      <td>-528058.000445</td>\n",
       "      <td>10</td>\n",
       "      <td>auto</td>\n",
       "      <td>25</td>\n",
       "      <td>{'max_depth': 10, 'max_features': 'auto', 'n_e...</td>\n",
       "      <td>28</td>\n",
       "      <td>-600562.181563</td>\n",
       "      <td>...</td>\n",
       "      <td>-710733.098256</td>\n",
       "      <td>-520676.506538</td>\n",
       "      <td>-657396.630228</td>\n",
       "      <td>-527250.180210</td>\n",
       "      <td>-604578.254796</td>\n",
       "      <td>-535850.704908</td>\n",
       "      <td>1.276463</td>\n",
       "      <td>2.135683</td>\n",
       "      <td>39060.310713</td>\n",
       "      <td>5850.430293</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7.872935</td>\n",
       "      <td>1.502697</td>\n",
       "      <td>-627471.180849</td>\n",
       "      <td>-522750.323802</td>\n",
       "      <td>10</td>\n",
       "      <td>auto</td>\n",
       "      <td>50</td>\n",
       "      <td>{'max_depth': 10, 'max_features': 'auto', 'n_e...</td>\n",
       "      <td>27</td>\n",
       "      <td>-593561.253732</td>\n",
       "      <td>...</td>\n",
       "      <td>-702352.894534</td>\n",
       "      <td>-514821.984959</td>\n",
       "      <td>-646546.519124</td>\n",
       "      <td>-524044.816161</td>\n",
       "      <td>-596190.433193</td>\n",
       "      <td>-527958.066721</td>\n",
       "      <td>1.049025</td>\n",
       "      <td>1.491461</td>\n",
       "      <td>36225.179033</td>\n",
       "      <td>5743.402609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10.642504</td>\n",
       "      <td>2.227475</td>\n",
       "      <td>-625250.042255</td>\n",
       "      <td>-520297.873599</td>\n",
       "      <td>10</td>\n",
       "      <td>auto</td>\n",
       "      <td>75</td>\n",
       "      <td>{'max_depth': 10, 'max_features': 'auto', 'n_e...</td>\n",
       "      <td>26</td>\n",
       "      <td>-595222.201695</td>\n",
       "      <td>...</td>\n",
       "      <td>-702992.782973</td>\n",
       "      <td>-514285.005215</td>\n",
       "      <td>-650177.661258</td>\n",
       "      <td>-526179.198296</td>\n",
       "      <td>-598802.436150</td>\n",
       "      <td>-526941.510860</td>\n",
       "      <td>1.332777</td>\n",
       "      <td>1.391471</td>\n",
       "      <td>37305.145410</td>\n",
       "      <td>5537.339700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>13.102797</td>\n",
       "      <td>1.598602</td>\n",
       "      <td>-622589.325370</td>\n",
       "      <td>-517914.197711</td>\n",
       "      <td>10</td>\n",
       "      <td>auto</td>\n",
       "      <td>100</td>\n",
       "      <td>{'max_depth': 10, 'max_features': 'auto', 'n_e...</td>\n",
       "      <td>25</td>\n",
       "      <td>-593548.336459</td>\n",
       "      <td>...</td>\n",
       "      <td>-700469.451490</td>\n",
       "      <td>-513510.695907</td>\n",
       "      <td>-647467.409274</td>\n",
       "      <td>-522857.509825</td>\n",
       "      <td>-594634.813997</td>\n",
       "      <td>-524046.291247</td>\n",
       "      <td>2.491238</td>\n",
       "      <td>0.796870</td>\n",
       "      <td>36535.205029</td>\n",
       "      <td>4527.169243</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2.363837</td>\n",
       "      <td>0.955541</td>\n",
       "      <td>-810008.883519</td>\n",
       "      <td>-695999.613338</td>\n",
       "      <td>10</td>\n",
       "      <td>sqrt</td>\n",
       "      <td>25</td>\n",
       "      <td>{'max_depth': 10, 'max_features': 'sqrt', 'n_e...</td>\n",
       "      <td>32</td>\n",
       "      <td>-749154.479786</td>\n",
       "      <td>...</td>\n",
       "      <td>-869282.771152</td>\n",
       "      <td>-685704.718933</td>\n",
       "      <td>-772334.624101</td>\n",
       "      <td>-693883.214921</td>\n",
       "      <td>-763710.176465</td>\n",
       "      <td>-692217.528111</td>\n",
       "      <td>1.158970</td>\n",
       "      <td>0.878415</td>\n",
       "      <td>37897.304723</td>\n",
       "      <td>10082.249043</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 33 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   mean_fit_time  mean_score_time  mean_test_score  mean_train_score  \\\n",
       "0       1.513492         1.439769   -634506.134927    -528058.000445   \n",
       "1       7.872935         1.502697   -627471.180849    -522750.323802   \n",
       "2      10.642504         2.227475   -625250.042255    -520297.873599   \n",
       "3      13.102797         1.598602   -622589.325370    -517914.197711   \n",
       "4       2.363837         0.955541   -810008.883519    -695999.613338   \n",
       "\n",
       "  param_max_depth param_max_features param_n_estimators  \\\n",
       "0              10               auto                 25   \n",
       "1              10               auto                 50   \n",
       "2              10               auto                 75   \n",
       "3              10               auto                100   \n",
       "4              10               sqrt                 25   \n",
       "\n",
       "                                              params  rank_test_score  \\\n",
       "0  {'max_depth': 10, 'max_features': 'auto', 'n_e...               28   \n",
       "1  {'max_depth': 10, 'max_features': 'auto', 'n_e...               27   \n",
       "2  {'max_depth': 10, 'max_features': 'auto', 'n_e...               26   \n",
       "3  {'max_depth': 10, 'max_features': 'auto', 'n_e...               25   \n",
       "4  {'max_depth': 10, 'max_features': 'sqrt', 'n_e...               32   \n",
       "\n",
       "   split0_test_score       ...         split7_test_score  split7_train_score  \\\n",
       "0     -600562.181563       ...            -710733.098256      -520676.506538   \n",
       "1     -593561.253732       ...            -702352.894534      -514821.984959   \n",
       "2     -595222.201695       ...            -702992.782973      -514285.005215   \n",
       "3     -593548.336459       ...            -700469.451490      -513510.695907   \n",
       "4     -749154.479786       ...            -869282.771152      -685704.718933   \n",
       "\n",
       "   split8_test_score  split8_train_score  split9_test_score  \\\n",
       "0     -657396.630228      -527250.180210     -604578.254796   \n",
       "1     -646546.519124      -524044.816161     -596190.433193   \n",
       "2     -650177.661258      -526179.198296     -598802.436150   \n",
       "3     -647467.409274      -522857.509825     -594634.813997   \n",
       "4     -772334.624101      -693883.214921     -763710.176465   \n",
       "\n",
       "   split9_train_score  std_fit_time  std_score_time  std_test_score  \\\n",
       "0      -535850.704908      1.276463        2.135683    39060.310713   \n",
       "1      -527958.066721      1.049025        1.491461    36225.179033   \n",
       "2      -526941.510860      1.332777        1.391471    37305.145410   \n",
       "3      -524046.291247      2.491238        0.796870    36535.205029   \n",
       "4      -692217.528111      1.158970        0.878415    37897.304723   \n",
       "\n",
       "   std_train_score  \n",
       "0      5850.430293  \n",
       "1      5743.402609  \n",
       "2      5537.339700  \n",
       "3      4527.169243  \n",
       "4     10082.249043  \n",
       "\n",
       "[5 rows x 33 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results = pd.DataFrame(RF_classifier.cv_results_)\n",
    "results.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_depth': 20, 'max_features': 'auto', 'n_estimators': 100}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "RF_classifier.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=20,\n",
       "           max_features='auto', max_leaf_nodes=None,\n",
       "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "           min_samples_leaf=1, min_samples_split=2,\n",
       "           min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,\n",
       "           oob_score=False, random_state=55, verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "RF_classifier.best_estimator_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Comparing the best model from grid search with our last model using the test set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "## Random Forests\n",
    "RF_model1 = RandomForestRegressor(n_estimators=50, max_depth=16, random_state=123, n_jobs=-1)\n",
    "RF_model1.fit(X_train, y_train)\n",
    "RF_model1_test_mse = mean_squared_error(y_pred=RF_model1.predict(X_test), y_true=y_test)\n",
    "\n",
    "## Random Forest with tunned parameters \n",
    "RF_tunned_test_mse = mean_squared_error(y_pred=RF_classifier.predict(X_test), y_true=y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "mse_models = pd.Series({'Not Tunned': RF_model1_test_mse, 'Tunned Model':RF_tunned_test_mse})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh4AAAE/CAYAAAAJ28pEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFYxJREFUeJzt3Xuw3lV97/H3R4KJIPdYTwLiliN1xIqAQK1V7GCtSjxK\nrfVSj0WKpdZLtVYrLUebzlSLbelQS6eFOi03FUTQYWyVUWuPWgo0gXArJxIwjKYK5Y43lPA9fzxr\n67Mfsndue6+dZ+f9mnlm/571+z3rt75ZmZ1P1m9lJ1WFJElSD4+Z7wFIkqSdh8FDkiR1Y/CQJEnd\nGDwkSVI3Bg9JktSNwUOSJHVj8JCk7ZDknCR/Mt/jkMaFwUMSSdYn+WGSpSPt1yapJBPt/QFJLkly\nV5L7k9yY5I3t3ES79jsjr9d0GH8leeos9LMyyQWzMaZp+v/XJG+aq/6lcbBovgcgaYfxdeB1wF8D\nJHkmsNvINecD1wFPBh4Cngn8j5Fr9q6qh+d2qJLGlSsekiadD/z60PsTgPNGrjkKOKeqvltVD1fV\ntVX12W25WZLlSS5Lck+SdUl+c+jcyiSfSHJekgeT3JTkyGn6+XI7vG54hSXJy5KsSXJfkiuSHDr0\nmfcm2dD6XpvkhUleAvwh8JrWz3XT3O/wJNe0z14ELBk6t0+SzyT57yT3tuMD2rkPAM8Hzmz9n9na\n/yrJN5I8kGR1kudvy6+nNC4MHpImXQnsmeTpSXYBXguMPna4EvibJK9NcuB23u9C4JvAcuBVwAeT\nHDt0/uXtmr2By4AzN9VJVR3TDp9VVY+vqouSHA78A/BbwH7AWcBlSRYneRrwNuCoqtoDeDGwvqo+\nB3wQuKj186zReyV5LPBpBiFtX+Bi4FeGLnkM8I8MVoQOBL4/Oe6qOhX4CvC21v/b2mf+Azis9fcx\n4OIkS5AWKIOHpGGTqx4vAm4GNoyc/1UGf3i+D/h6W1E4auSau9oqw+Tr6aM3SfIk4OeB91bVD6pq\nDfARpq64fLWq/rmqNrZxPSoIzOBk4KyquqqqNlbVuQweDT0H2AgsBg5JsmtVra+qW7ew3+cAuwJn\nVNWPquqTDIIDAFV1d1VdUlXfq6oHgQ8AL5ipw6q6oH3u4ao6vY3taVtRqzRWDB6Shp0P/BrwRh79\nmIWqureqTqmqZwBPBNYAn06SocuWVtXeQ6+bN3Gf5cA97Q/nSbcD+w+9//bQ8feAJUm2dF/ak4Hf\nGw5AwJOA5VW1DngnsBK4M8mFSZZvYb/LgQ019X/XvH3yIMluSc5KcnuSB4AvA3u3FaRNSvLuJDe3\nzbr3AXsBS6e7Xhp3Bg9JP1ZVtzPYZHoccOlmrr0L+AsGfxjvu5W3+i9g3yR7DLUdyKNXWLbVN4AP\njASg3arq4wBV9bGqeh6DgFLAh9rnNvffdX8L2H8kaA0/cvo9BqsVP1tVewKTj4Emr5/Sf9vP8fvA\nq4F9qmpv4P6h66UFx+AhadRJwLFV9d3RE0k+lORnkixqoeG3gXVVdffW3KCqvgFcAfxpkiVt4+dJ\nPHpPyZa6Azho6P3fA29O8rMZ2D3JiiR7JHlakmOTLAZ+wGAfxiND/Uwkme57478DDwO/k2TXJK8E\njh46v0fr774k+wJ/tJlx7tH6+29gUZL3A3tuZe3SWDF4SJqiqm6tqlXTnN4N+BRwH3AbgxWDl49c\nc9/Iz/F41zR9vQ6YYLD68Sngj6rqC9s47JXAue2xyqvb+H+TwcbOe4F1DB4fwWAPxWnAXQwe5/wU\n8Aft3MXt691Jrhm9SVX9EHhl6+se4DVMXRk6A3hc6/tK4HMjXfwV8Kr2L14+DFzervkag0c2P2Cw\nWiMtWJn6qFKSJGnuuOIhSZK6MXhIkqRuDB6SJKkbg4ckSerG4CFJkrrxf6edA0uXLq2JiYn5HoYk\nSV2sXr36rqp6wpZca/CYAxMTE6xaNd2PQZAkaWFJcvvmrxrwUYskSerG4CFJkroxeEiSpG4MHpIk\nqRuDhyRJ6sbgIUmSujF4SJKkbgwekiSpG4OHJEnqxuAhSZK6MXhIkqRuDB6SJKkbg4ckSerG4CFJ\nkroxeEiSpG4MHpIkqRuDhyRJ6sbgIUmSujF4SJKkbgwekiSpG4OHJEnqxuAhSZK6MXhIkqRuDB6S\nJKkbg4ckSerG4CFJkroxeEiSpG4MHpIkqRuDhyRJ6sbgIUmSujF4SJKkbgwekiSpm0XzPYCF6IYN\n9zNxyj/N9zAkSXqU9aetmNf7u+IhSZK6MXhIkqRuDB6SJKkbg4ckSerG4CFJkroxeEiSpG4MHpIk\nqRuDhyRJ6sbgIUmSujF4SJKkbgwekiSpG4OHJEnqxuAhSZK6MXhIkqRuDB6SJKkbg4ckSerG4CFJ\nkroxeEiSpG4MHpIkqRuDhyRJ6sbgIUmSujF4SJKkbgwekiSpG4OHJEnqxuAhSZK6MXhIkqRuDB6S\nJKkbg4ckSerG4CFJkrqZk+CRpJKcPvT+3UlWbuYzxyc5ZBPtpyZZ014bh45/Zw6GvllJvprksPm4\ntyRJ426uVjweAl6ZZOlWfOZ44FHBo6o+UFWHVdVhwPcnj6vqw7M1WEmS1MdcBY+HgbOB3x09kWQi\nyb8kuT7JF5McmOS5wMuBP2+rGf9zS26S5IIkxw+9/077+out70uTrE1y3tA130yyMsm1bQw/3dof\nn+ScJFe3c/+rte+W5OIkNye5BFiyHb8ukiTt1OZyj8ffAK9PstdI+18D51bVocBHgQ9X1RXAZcB7\n2mrGrbNw/yOAtzFYRXl6kucMnbujqg4HPgK8q7W9H/hcVR0NHAucnmRJ6+Peqno68CfA4bMwNkmS\ndkpzFjyq6gHgPGB0L8bPAR9rx+cDz5ujIVxZVf9VVRuBNcDE0LlL29fVQ+2/BJyaZA3wJQYrGwcC\nxwAXAFTVtcBNm7pZkpOTrEqyauP37p/lUiRJWhgWzXH/ZwDXAP84R/0/TAtPSXZhaj0PDR1vnObc\ncHuA40dXW5Js0UCq6mwGj5dYvOzg2rLhS5K0c5nTf05bVfcAnwBOGmq+AnhtO3498JV2/CCwx1be\nYj3w7Hb8y8Au2zTQgcuBt0++STL5SOXLwK+1tmcBz9iOe0iStFPr8XM8TgeG/3XL24ETk1wPvAF4\nR2u/EHhP29i5RZtLgbOAFyW5jsHei4c2c/1M/hjYPckNSW4CVrb2M4H9ktwMvA+4djvuIUnSTi1V\nPhWYbYuXHVzLTjhjvochSdKjrD9txaz3mWR1VR25Jdf6k0slSVI3Bg9JktSNwUOSJHVj8JAkSd0Y\nPCRJUjcGD0mS1I3BQ5IkdWPwkCRJ3Rg8JElSNwYPSZLUjcFDkiR1Y/CQJEndGDwkSVI3Bg9JktSN\nwUOSJHVj8JAkSd0YPCRJUjcGD0mS1I3BQ5IkdWPwkCRJ3Rg8JElSNwYPSZLUjcFDkiR1Y/CQJEnd\nGDwkSVI3Bg9JktSNwUOSJHVj8JAkSd0YPCRJUjeL5nsAC9Ez99+LVaetmO9hSJK0w3HFQ5IkdWPw\nkCRJ3Rg8JElSNwYPSZLUjcFDkiR1Y/CQJEndGDwkSVI3Bg9JktSNwUOSJHVj8JAkSd0YPCRJUjcG\nD0mS1I3BQ5IkdWPwkCRJ3Rg8JElSNwYPSZLUjcFDkiR1Y/CQJEndGDwkSVI3Bg9JktSNwUOSJHVj\n8JAkSd0YPCRJUjcGD0mS1I3BQ5IkdWPwkCRJ3Rg8JElSNwYPSZLUjcFDkiR1Y/CQJEndGDwkSVI3\nBg9JktSNwUOSJHVj8JAkSd0YPCRJUjcGD0mS1I3BQ5IkdWPwkCRJ3Rg8JElSNwYPSZLUjcFDkiR1\nY/CQJEndGDwkSVI3Bg9JktSNwUOSJHVj8JAkSd0YPCRJUjcGD0mS1I3BQ5IkdWPwkCRJ3Rg8JElS\nNwYPSZLUjcFDkiR1Y/CQJEndGDwkSVI3Bg9JktSNwUOSJHVj8JAkSd0YPCRJUjcGD0mS1I3BQ5Ik\ndWPwkCRJ3Rg8JElSNwYPSZLUjcFDkiR1Y/CQJEndGDwkSVI3i+Z7AAvRDRvuZ+KUf5rvYUiSFoj1\np62Y7yHMGlc8JElSNwYPSZLUjcFDkiR1Y/CQJEndGDwkSVI3Bg9JktSNwUOSJHVj8JAkSd0YPCRJ\nUjcGD0mS1I3BQ5IkdWPwkCRJ3Rg8JElSNwYPSZLUjcFDkiR1Y/CQJEndGDwkSVI3Bg9JktSNwUOS\nJHVj8JAkSd0YPCRJUjcGD0mS1I3BQ5IkdWPwkCRJ3Rg8JElSNwYPSZLUjcFDkiR1Y/CQJEndGDwk\nSVI3MwaPJPslWdNe306yYej9Y3sNso3lF5N8epr2SvLGobYjW9s7t6L/pyZZs73XSJKk6S2a6WRV\n3Q0cBpBkJfCdqvqLDuPaWjcArwHOae9fB1w3b6ORJEmbtE2PWkb/5p/klCT/px1/NclpSa5OsjbJ\nc1v7m5J8MsnlSW5J8qdDn39pkn9Pck2Si5Ls3tpXtD6uAV4xw5BuA/ZMsjTJY4AXAZcP9X9EkquS\nXJ/kkiR7tfajWtsa4M1D1y9K8pethuuTvGlbfp0kSdJUc7XHI1V1NPAe4P1D7c8CfhU4FPjfSZYn\n+SngFOCFVXUEcD3wjiS7AWcBxwHPBpZv5p6XAK8Cng9cBfxo6NwFwLuq6lBgLfC+1n4O8NtVdRiw\ny9D1JwN3thqOAt6a5MCtqF+SJG3CXAWPS9vX1cDEUPsXquqBqvo+8P+AA4HnAocAV7SVh9e3zxwC\nfK2qbq2qAj66mXteBLyawWOWj082JtkPWFJV/9aazgWOSbIUeNxQ+/lDff0ScGIbz1XA3sDBM908\nyclJViVZtfF7929mqJIk7Zxm3OMxg4eZGlqWtLZJD7WvG0fu8dDQ8eS5AJ+rqjcM3yDJkVszoKra\nkCTAC4C3AMduzedHBHhLVX1xZExPneH+ZwNnAyxednBtx70lSVqwtnXF49vA8iT7JFkCrNiOMVwB\nvCDJQQBJdk9yMPCfwMFJntICxeu2oK/3Ae+tqkcmG9oG2e9P7jUB3gD836q6q7X/XGt//VA/lwNv\nSbKojelpSR63HTVKkiS2ccWjqn6Q5IPAKmADg5CwTarqjiQnARcN/RPdP6yqW5K8Gfgs8F3g3xg8\nmpmpr69Oc+oNwN+28LAOOLG1nwh8JMkjwOeHrj+r3WvNIPNwJzNvbpUkSVsgg+0Tmk2Llx1cy044\nY76HIUlaINaftj0PFuZektVVtUVbJPzJpZIkqRuDhyRJ6sbgIUmSujF4SJKkbgwekiSpG4OHJEnq\nxuAhSZK6MXhIkqRuDB6SJKkbg4ckSerG4CFJkroxeEiSpG4MHpIkqRuDhyRJ6sbgIUmSujF4SJKk\nbgwekiSpG4OHJEnqxuAhSZK6MXhIkqRuDB6SJKkbg4ckSerG4CFJkroxeEiSpG4MHpIkqRuDhyRJ\n6sbgIUmSujF4SJKkbgwekiSpm0XzPYCF6Jn778Wq01bM9zAkSdrhuOIhSZK6MXhIkqRuDB6SJKkb\ng4ckSerG4CFJkroxeEiSpG4MHpIkqRuDhyRJ6sbgIUmSujF4SJKkbgwekiSpG4OHJEnqxuAhSZK6\nMXhIkqRuDB6SJKkbg4ckSerG4CFJkroxeEiSpG4MHpIkqRuDhyRJ6sbgIUmSujF4SJKkbgwekiSp\nG4OHJEnqxuAhSZK6MXhIkqRuDB6SJKkbg4ckSerG4CFJkroxeEiSpG4MHpIkqRuDhyRJ6sbgIUmS\nuklVzfcYFpwkDwJr53scs2wpcNd8D2IWLbR6wJrGwUKrBxZeTQutHuhT05Or6glbcuGiOR7Izmpt\nVR0534OYTUlWLaSaFlo9YE3jYKHVAwuvpoVWD+x4NfmoRZIkdWPwkCRJ3Rg85sbZ8z2AObDQalpo\n9YA1jYOFVg8svJoWWj2wg9Xk5lJJktSNKx6SJKkbg8csSvKSJGuTrEtyynyPZ1SS9UluSLImyarW\ntm+Szye5pX3dZ+j6P2i1rE3y4qH2Z7d+1iX5cJK09sVJLmrtVyWZmIMa/iHJnUluHGrrUkOSE9o9\nbklywhzXtDLJhjZXa5IcNy41JXlSki8l+c8kNyV5R2sf23maoaaxnKckS5JcneS6Vs8ft/ZxnqPp\nahrLORrqd5ck1yb5THs/tnP0Y1XlaxZewC7ArcBBwGOB64BD5ntcI2NcDywdafsz4JR2fArwoXZ8\nSKthMfCUVtsu7dzVwHOAAJ8FXtra3wL8XTt+LXDRHNRwDHAEcGPPGoB9gdva133a8T5zWNNK4N2b\nuHaHrwlYBhzRjvcAvtbGPbbzNENNYzlP7d6Pb8e7Ale1MY3zHE1X01jO0dA43wV8DPhMez+2czT5\ncsVj9hwNrKuq26rqh8CFwCvmeUxb4hXAue34XOD4ofYLq+qhqvo6sA44OskyYM+qurIGv0PPG/nM\nZF+fBF44maxnS1V9GbhnHmp4MfD5qrqnqu4FPg+8ZA5rms4OX1NVfauqrmnHDwI3A/szxvM0Q03T\n2aFrqoHvtLe7tlcx3nM0XU3T2eFrSnIAsAL4yMi4x3KOJhk8Zs/+wDeG3n+Tmb8xzYcCvpBkdZKT\nW9sTq+pb7fjbwBPb8XT17N+OR9unfKaqHgbuB/ab7SI2oUcN8zG/b09yfQaPYiaXU8eqprZ0eziD\nv30uiHkaqQnGdJ7aEv4a4E4Gf8iM/RxNUxOM6RwBZwC/Dzwy1DbWcwQGj53N86rqMOClwFuTHDN8\nsqXhsf5nTguhhuZvGTy2Owz4FnD6/A5n6yV5PHAJ8M6qemD43LjO0yZqGtt5qqqN7fvBAQz+Zvwz\nI+fHbo6mqWks5yjJy4A7q2r1dNeM4xyBwWM2bQCeNPT+gNa2w6iqDe3rncCnGDweuqMtxdG+3tku\nn66eDe14tH3KZ5IsAvYC7p6LWkb0qKHr/FbVHe2b6CPA3zOYqynjGxnHDlVTkl0Z/AH90aq6tDWP\n9TxtqqZxn6dWw33AlxgspY/1HE0armmM5+jngZcnWc/g0f2xSS5gIczRdJs/fG31BqBFDDbgPIWf\nbC59xnyPa2h8uwN7DB1fweAbzZ8zdaPSn7XjZzB1o9JtTL9R6bjW/lamblT6xBzVMsHUjZhzXgOD\nTVZfZ7DRap92vO8c1rRs6Ph3GTy7HYua2v3PA84YaR/beZqhprGcJ+AJwN7t+HHAV4CXjfkcTVfT\nWM7RSG2/wE82l47tHP24ntnqyFcBHMdgt/utwKnzPZ6RsR3UflNeB9w0OT4Gz/O+CNwCfGH4Nxdw\naqtlLW0XdGs/ErixnTuTn/wguiXAxQw2NV0NHDQHdXycwXLpjxg8dzypVw3Ab7T2dcCJc1zT+cAN\nwPXAZUz95rlD1wQ8j8Hy7/XAmvY6bpznaYaaxnKegEOBa9u4bwTe3/P7wRzN0XQ1jeUcjdT2C/wk\neIztHE2+/MmlkiSpG/d4SJKkbgwekiSpG4OHJEnqxuAhSZK6MXhIkqRuDB6SJKkbg4ckSerG4CFJ\nkrr5//JY5rBIzl4AAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x2d368e74ac8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fig, ax = plt.subplots(figsize=(8,5))\n",
    "mse_models.sort_values().plot(kind='barh', ax=ax)\n",
    "ax.set_title('MSE on test data');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-4.580267005058392"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "100*(RF_tunned_test_mse-RF_model1_test_mse)/RF_model1_test_mse"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Excercise: tune the hyper-parameters of the credit card default dataset"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
